1.Exercises in R for Data Science 3.5.1(2,3)
#3.5.1(2)What do the empty cells in plot with facet_grid(drv ~ cyl) mean? How do they relate to this plot?
library(ggplot2)
ggplot(data = mpg) +
geom_point(mapping = aes(x = drv, y = cyl))
ggplot(data = mpg) +
geom_point(mapping = aes(x = drv, y = cyl))+facet_grid(drv ~ cyl)
# The empty cells in facet_grid(drv~cyl) mean that there are no combination of two variables in those rows and columns.
#3.5.1(3)What plots does the following code make? What does . do?
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
# If we do not want to facet in the rows or columns dimension, use a . instead of a variable name
#3.6.1(6)Recreate the R code necessary to generate the following graphs
library(ggplot2)
graph1<-ggplot(data=mpg)+geom_point(mapping = aes(x = displ, y = hwy,size=class))+geom_smooth(mapping = aes(x = displ, y = hwy,se = FALSE))
## Warning: Ignoring unknown aesthetics: se
graph2<-ggplot(data=mpg)+geom_point(mapping = aes(x = displ, y = hwy,size=class))+geom_smooth(mapping = aes(x = displ, y = hwy,group=drv,se = FALSE))
## Warning: Ignoring unknown aesthetics: se
graph3<-ggplot(data=mpg)+geom_point(mapping = aes(x = displ, y = hwy,color=drv,size=class))+geom_smooth(mapping = aes(x = displ, y = hwy,color=drv,se = FALSE))
## Warning: Ignoring unknown aesthetics: se
graph4<-ggplot(data=mpg)+geom_point(mapping = aes(x = displ, y = hwy,color=drv,size=class))+geom_smooth(mapping = aes(x = displ, y = hwy,se = FALSE))
## Warning: Ignoring unknown aesthetics: se
graph5<-ggplot(data=mpg)+geom_point(mapping = aes(x = displ, y = hwy,color=drv,size=class))+geom_smooth(mapping = aes(x = displ, y = hwy,linetype=drv,se = FALSE))
## Warning: Ignoring unknown aesthetics: se
graph6<-ggplot(data=mpg)+geom_point(mapping = aes(x = displ, y = hwy,color=drv,size=class))
par(mfrow=c(3,2))
graph1
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
graph2
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
graph3
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
graph4
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
graph5
## Warning: Using size for a discrete variable is not advised.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
graph6
## Warning: Using size for a discrete variable is not advised.
3.Exercises in R for Data Science 5.2.4(1,2,3,4)
#5.2.4(1)
#1.Had an arrival delay of two or more hours
library(nycflights13)
library(tidyverse)
## ─ Attaching packages ────────────────────────────── tidyverse 1.2.1 ─
## ✔ tibble 1.4.2 ✔ purrr 0.2.5
## ✔ tidyr 0.8.1 ✔ dplyr 0.7.6
## ✔ readr 1.1.1 ✔ stringr 1.3.1
## ✔ tibble 1.4.2 ✔ forcats 0.3.0
## ─ Conflicts ─────────────────────────────── tidyverse_conflicts() ─
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
filter(flights, arr_delay >= 120)
#2.Flew to Houston (IAH or HOU)
filter(flights,dest %in% c("IAH", "HOU"))
#3.Were operated by United, American, or Delta
filter(flights, carrier %in% c("AA", "DL", "UA"))
#4.Departed in summer (July, August, and September)
filter(flights, month %in% c(7:9))
#5.Arrived more than two hours late, but didn’t leave late
filter(flights, arr_delay > 120&dep_delay <= 0)
#6.Were delayed by at least an hour, but made up over 30 minutes in flight
filter(flights, dep_delay >= 60&(dep_delay - arr_delay > 30))
#7.Departed between midnight and 6am (inclusive)
filter(flights, dep_time >= 0 & dep_time <= 600)